This is a step by step my pathway analysis of the single cell sequencing of reprogrammed human dendritic cells

library(Seurat)
library(dplyr)
library(monocle)
library(edgeR)
library(clusterProfiler)
library(pheatmap)
library(gplots)

1 Read in the data after filtering out cells and batch correction:

pbmc <- readRDS("~/Documents/Lund/Vor2018_Haust2014/SingleCell/filtered.idc.rds")
pbmc
## An object of class Seurat 
## 22286 features across 3693 samples within 2 assays 
## Active assay: integrated (2684 features)
##  1 other assay present: RNA
##  2 dimensional reductions calculated: pca, tsne

Overall analysis of the data:

tSNE

DimPlot(object = pbmc,reduction.key = "tSNE_1", group.by="cell_type", cols=c("grey", "darkgoldenrod3", "darkred", "dodgerblue4","darkgreen", "darkorchid4"))

Heatmap

Number of genes in each cluster

Idents(object = pbmc) <- pbmc$cell_type ## Run for supervised

#pbmc.markers <- FindAllMarkers(object = pbmc, only.pos = T, min.pct = 0.1, logfc.threshold = 0.3)
pbmc.markers <- readRDS("load_obj/pbmc.markers")
table(pbmc.markers$cluster)
## 
##   HEF idc_3 idc_9  cDC1  cDC2   pDC 
##   258     1   180     0     0    58

Top two genes in each cluster

pbmc.markers %>% 
  group_by(cluster) %>% 
  top_n(n = 2, wt = avg_logFC)
top10 <- pbmc.markers %>% 
  group_by(cluster) %>% 
  top_n(n = 20, wt = avg_logFC)

Heatmap of top 20 genes in each cluster

DoHeatmap(object = pbmc, features = top10$gene, group.by = "cell_type") + NoLegend()

2 Filtering lowly expressed genes:

Starting gene number from Ilia:

nrow(pbmc@assays$RNA)
## [1] 19602
#Connecting toghter name and cell type:
number.list <- unlist(strsplit(colnames(pbmc@assays$RNA@counts),"-"))[seq(2,2*3693,2)]
HSMM_sample_sheet_name <- data.frame(cell_name = number.list, row.names = colnames(pbmc@assays$RNA@counts))
HSMM_sample_sheet <- data.frame(cell_type = ifelse(number.list == 1, "HEF",
                                          ifelse(number.list == 2, "idc_3",
                                                 ifelse(number.list==3, "idc_9",
                                                        ifelse(number.list==4, "cDC1",
                                                               ifelse(number.list==5, "cDC2","pDC"))))), row.names = colnames(pbmc@assays$RNA@counts))
# Steps in Monocle
pd <- new("AnnotatedDataFrame", data = HSMM_sample_sheet)
HSMM_gene_annotation <- data.frame(gene_short_name = rownames(pbmc@assays$RNA@counts), row.names = rownames(pbmc@assays$RNA@counts))
fd <- new("AnnotatedDataFrame", data = HSMM_gene_annotation)
HSMM <- newCellDataSet(as.matrix(pbmc@assays$RNA@counts), phenoData = pd, 
                       featureData = fd, expressionFamily=negbinomial.size())
pData(HSMM)$cell_name = HSMM_sample_sheet_name
HSMM <- estimateSizeFactors(HSMM)
HSMM <- estimateDispersions(HSMM)
## Removing 186 outliers
HSMM <- detectGenes(HSMM, min_expr = 0.1)
expressed_genes <- row.names(subset(fData(HSMM),
                                    num_cells_expressed >= 10))
pData(HSMM)$Total_mRNAs <- Matrix::colSums(exprs(HSMM))
HSMM <- HSMM[,pData(HSMM)$Total_mRNAs < 1e6] ## no cells above 1e6

Number of genes used in analysis

length(expressed_genes)
## [1] 16476

3 EdgeR

# counts <-  exprs(HSMM[expressed_genes,])
# colnames(counts) <- pData(HSMM)[,1]
# 
# cds <- DGEList(counts=counts,group=pData(HSMM)[,1])
# cds <-calcNormFactors(cds)
# cds <- estimateDisp(cds)
cds <- readRDS("load_obj/cds")
head(cds$samples)

3.1 Pairwise comparison

# gene.sign.edgeR = list()
# sample.list.edgeR <- levels(pData(HSMM)$cell_type)
# for (i in 1:(length(sample.list.edgeR)-1)){
#   for (j in (i+1):length(sample.list.edgeR)) {
#     compare.sample.edgeR <- paste0(sample.list.edgeR[i], "|", sample.list.edgeR[j])
#     print(compare.sample.edgeR)
#     compare.edgeR <- exactTest(cds, pair=c(sample.list.edgeR[i], sample.list.edgeR[j]))
#     compare.edgeR <- compare.edgeR$table
#     compare.edgeR$p.adjust <- p.adjust(compare.edgeR$PValue, "BH")
#     
#     sig_gene.edgeR<- compare.edgeR[compare.edgeR$p.adjust < 0.05,]
#     sig_gene.edgeR <- sig_gene.edgeR[abs(sig_gene.edgeR$logFC) > 0.5,]
#     
#     write.table(sig_gene.edgeR, file=paste0("edgeR_sig_gene_",
#                                             compare.sample.edgeR,".txt"),
#                 row.names = T, col.names = T, quote = F, sep="\t")
#     
#     gene.sign.edgeR[[compare.sample.edgeR]] = compare.edgeR
#   }
# }
gene.sign.edgeR <- readRDS("load_obj/gene.sign.edgeR")

Day 3 vs Hefs

gene.sign.edgeR$`HEF|idc_3`[order(abs(gene.sign.edgeR$`HEF|idc_3`$logFC), decreasing = T),]

Day 9 vs Hefs

gene.sign.edgeR$`HEF|idc_9`[order(abs(gene.sign.edgeR$`HEF|idc_9`$logFC), decreasing = T),]

Day 9 vs Day 3

gene.sign.edgeR$`idc_3|idc_9`[order(abs(gene.sign.edgeR$`idc_3|idc_9`$logFC), decreasing = T),]

Day 9 vs cDC1

gene.sign.edgeR$`cDC1|idc_9`[order(abs(gene.sign.edgeR$`cDC1|idc_9`$logFC), decreasing = T),]

Day 3 vs cDC1

gene.sign.edgeR$`cDC1|idc_3`[order(abs(gene.sign.edgeR$`cDC1|idc_3`$logFC), decreasing = T),]

3.2 Heatmap of DE genes:

cpm_counts_sig_hef_idc_upDown <- cpm(cds)
counts_scaled_sig_hef_id_upDown <-t(scale(t(cpm_counts_sig_hef_idc_upDown)))

MinMax <- function(data, min, max){
  data2 <- data
  data2[data2 < min] = min
  data2[data2 > max] = max
  return(data2)
}

counts_scaled_sig_hef_id_upDown2 = MinMax(counts_scaled_sig_hef_id_upDown, -2.5, 2.5)
mycol <- colorpanel(100, "purple", "black", "yellow")

make_matrix <- function(x, sample1, sample2) {
  return_matrix <- as.matrix(counts_scaled_sig_hef_id_upDown2[rownames(x),
                                                           c(which(colnames(counts_scaled_sig_hef_id_upDown2) == sample1),
                                                             which(colnames(counts_scaled_sig_hef_id_upDown2) == sample2))])
  colnames(return_matrix) <- rownames(pData(HSMM))[c(which(HSMM$cell_type==sample1), which(HSMM$cell_type==sample2))]
  return(return_matrix)
}

make_datafram <- function(sample1, sample2) {
  d3_hef_colname_dataframe <- data.frame(Cell_type = HSMM$cell_type[c(which(HSMM$cell_type == sample1), which(HSMM$cell_type == sample2))], row.names = rownames(pData(HSMM))[c(which(HSMM$cell_type==sample1), which(HSMM$cell_type== sample2))])
}

plot_heatmap <- function(matrix, df) {
  return_plot <- pheatmap(matrix, col=mycol, show_colnames = F, cutree_rows = 2, annotation_names_row = F,
                              show_rownames = F, cluster_cols = F, annotation_col = df,
                              clustering_distance_rows = "correlation",
                              clustering_method = "ward.D2", annotation_names_col = F )
}

Day 3 vs Hefs

idc3_hef <- make_matrix(gene.sign.edgeR$`HEF|idc_3`, "idc_3", "HEF")
idc3_hef_df <- make_datafram("idc_3", "HEF")
#plot_heatmap(idc3_hef, idc3_hef_df)

Day 9 vs Hefs

idc9_hef <- make_matrix(gene.sign.edgeR$`HEF|idc_9`, "idc_9", "HEF")
idc9_hef_df <- make_datafram("idc_9", "HEF")
#plot_heatmap(idc9_hef, idc9_hef_df)

Day 9 vs Day 3

idc9_idc3 <- make_matrix(gene.sign.edgeR$`idc_3|idc_9`, "idc_9", "idc_3")
idc9_idc3_df <- make_datafram("idc_9", "idc_3")
#plot_heatmap(idc9_idc3, idc9_idc3_df)

Day 9 vs cDC1

idc9_cDC1 <- make_matrix(gene.sign.edgeR$`cDC1|idc_9`, "idc_9", "cDC1")
idc9_cDC1_df <- make_datafram("idc_9", "cDC1")
#plot_heatmap(idc9_cDC1, idc9_cDC1_df)

4 Pathway analysis

4.1 Define cluster

define_cluster <- function(x, logFC) {
  cluster <- list()
  cluster$Patways_up <- bitr(rownames(x[x[,1] > logFC, ]),
                                               fromType = "SYMBOL", toType = "ENTREZID",
                                               OrgDb = "org.Hs.eg.db")[,2]
  cluster$Patways_down <-  bitr(rownames(x[x[,1] < -logFC, ]),
                                               fromType = "SYMBOL", toType = "ENTREZID",
                                               OrgDb = "org.Hs.eg.db")[,2]
  return(cluster)
}

Day 3 vs Hefs

Numer of genes up in day 3 compared to Hef

d3_hef_cluster <- define_cluster(gene.sign.edgeR$`HEF|idc_3`, 0.5)
length(d3_hef_cluster$Patways_up) 
## [1] 1568

Numer of genes down in day 3 compared to Hef

length(d3_hef_cluster$Patways_down)
## [1] 2450

Day 9 vs Hef

Numer of genes up in day 9 compared to Hef

d9_hef_cluster <- define_cluster(gene.sign.edgeR$`HEF|idc_9`, 0.5)
length(d9_hef_cluster$Patways_up) 
## [1] 2114

Numer of genes down in day 3 compared to Hef

length(d9_hef_cluster$Patways_down)
## [1] 2682

Day 9 vs Day 3

Numer of genes up in day 9 compared to day 3

d9_d3_cluster <- define_cluster(gene.sign.edgeR$`idc_3|idc_9`, 0.5)
length(d9_d3_cluster$Patways_up) 
## [1] 2238

Numer of genes down in day 9 compared to day 3

length(d9_d3_cluster$Patways_down)
## [1] 1831

Day 9 vs cDC1

Numer of genes up in day 9 compared to cDC1

d9_cDC1_cluster <- define_cluster(gene.sign.edgeR$`cDC1|idc_9`, 0.5)
length(d9_cDC1_cluster$Patways_up) 
## [1] 5261

Numer of genes down in day 9 compared to cDC1

length(d9_cDC1_cluster$Patways_down)
## [1] 1143

Day 3 vs cDC1

Numer of genes up in day 3 compared to cDC1

d3_cDC1_cluster <- define_cluster(gene.sign.edgeR$`cDC1|idc_3`, 0.5)
length(d3_cDC1_cluster$Patways_up) 
## [1] 5299

Numer of genes down in day 3 compared to cDC1

length(d3_cDC1_cluster$Patways_down)
## [1] 1551

4.2 GO Pathway analysis Using clusterProfiler:

pathway_analysis <- function(x, ont, name) {
  results <- compareCluster(geneClusters = x, fun="enrichGO", OrgDb="org.Hs.eg.db", ont=ont, qvalueCutoff=0.05)
  saveRDS(results, file=paste0("compareClusters/single_cell/", name, "_", ont, ".Rdata"))
  return(results)
}

Cellular component

Day 3 vs Hefs

#d3_hef_CC <- pathway_analysis(d3_hef_cluster, "CC", "d3_hef")
d3_hef_CC <- readRDS("compareClusters/single_cell/d3_hef_CC.Rdata")
dotplot(d3_hef_CC, showCategory=5) + theme(axis.text.x = element_text(angle = 45, hjust = 1))

Day 9 vs Hef

#d9_hef_CC <- pathway_analysis(d9_hef_cluster, "CC", name="d9_hef")
d9_hef_CC <- readRDS("compareClusters/single_cell/d9_hef_CC.Rdata")
dotplot(d9_hef_CC, showCategory=5) + theme(axis.text.x = element_text(angle = 45, hjust = 1))

Day 9 vs Day 3

#d9_d3_CC <- pathway_analysis(d9_d3_cluster, "CC", name="d9_d3")
d9_d3_CC <- readRDS("compareClusters/single_cell/d9_d3_CC.Rdata")
dotplot(d9_d3_CC, showCategory=5) + theme(axis.text.x = element_text(angle = 45, hjust = 1))

Day 9 vs cDC1

#d9_cDC1_CC <- pathway_analysis(d9_cDC1_cluster, "CC", name="d9_cDC1")
d9_cDC1_CC <- readRDS("compareClusters/single_cell/d9_cDC1_CC.Rdata")
dotplot(d9_cDC1_CC, showCategory=5) + theme(axis.text.x = element_text(angle = 45, hjust = 1))

Day 3 vs cDC1

#d3_cDC1_CC <- pathway_analysis(d3_cDC1_cluster, "CC", name="d3_cDC1")
d3_cDC1_CC <- readRDS("compareClusters/single_cell/d3_cDC1_CC.Rdata")
dotplot(d3_cDC1_CC, showCategory=5) + theme(axis.text.x = element_text(angle = 45, hjust = 1))

Biological process

Day 3 vs Hefs

#d3_hef_BP <- pathway_analysis(d3_hef_cluster, "BP", "d3_hef")
d3_hef_BP <- readRDS("compareClusters/single_cell/d3_hef_BP.Rdata")
dotplot(d3_hef_BP, showCategory=5) + theme(axis.text.x = element_text(angle = 45, hjust = 1))

Day 9 vs Hefs

#d9_hef_BP <- pathway_analysis(d9_hef_cluster, "BP", "d9_hef")
d9_hef_BP <- readRDS("compareClusters/single_cell/d9_hef_BP.Rdata")
dotplot(d9_hef_BP, showCategory=5) + theme(axis.text.x = element_text(angle = 45, hjust = 1))

Day 9 vs Day 3

#d9_d3_BP <- pathway_analysis(d9_d3_cluster, "BP", "d9_d3")
d9_d3_BP <- readRDS("compareClusters/single_cell/d9_d3_BP.Rdata")
dotplot(d9_d3_BP, showCategory=5) + theme(axis.text.x = element_text(angle = 45, hjust = 1))

Day 9 vs cDC1

#d9_cDC1_BP <- pathway_analysis(d9_cDC1_cluster, "BP", "d9_cDC1")
d9_cDC1_BP <- readRDS("compareClusters/single_cell/d9_cDC1_BP.Rdata")
dotplot(d9_cDC1_BP, showCategory=5) + theme(axis.text.x = element_text(angle = 45, hjust = 1))

Day 3 vs cDC1

#d3_cDC1_BP <- pathway_analysis(d3_cDC1_cluster, "BP", "d3_cDC1")
d3_cDC1_BP <- readRDS("compareClusters/single_cell/d3_cDC1_BP.Rdata")
dotplot(d3_cDC1_BP, showCategory=5) + theme(axis.text.x = element_text(angle = 45, hjust = 1))

4.3 KEGG Pathway analysis Using clusterProfiler:

KEGG_pathway_analysis <- function(x, name) {
  results <- compareCluster(geneClusters = x, fun="enrichKEGG", organism="hsa", keyType="kegg", qvalueCutoff=0.05)
  saveRDS(results, file=paste0("compareClusters/single_cell/", name, "_", "KEGG.Rdata"))
  return(results)
}

Day 3 vs Hefs

#d3_hef_KEGG <- KEGG_pathway_analysis(d3_hef_cluster, name="d3_hef")
d3_hef_KEGG <- readRDS("compareClusters/single_cell/d3_hef_KEGG.Rdata")
dotplot(d3_hef_KEGG, showCategory=5) + theme(axis.text.x = element_text(angle = 45, hjust = 1))

Day 9 vs Hefs

#d9_hef_KEGG <- KEGG_pathway_analysis(d9_hef_cluster, name="d9_hef")
d9_hef_KEGG <- readRDS("compareClusters/single_cell/d9_hef_KEGG.Rdata")
dotplot(d9_hef_KEGG, showCategory=5) + theme(axis.text.x = element_text(angle = 45, hjust = 1))

Day 9 vs day 3

#d9_d3_KEGG <- KEGG_pathway_analysis(d9_d3_cluster, name="d9_d3")
d9_d3_KEGG <- readRDS("compareClusters/single_cell/d9_d3_KEGG.Rdata")
dotplot(d9_d3_KEGG, showCategory=5) + theme(axis.text.x = element_text(angle = 45, hjust = 1))

Day 9 vs cDC1

d9_cDC1_KEGG <- KEGG_pathway_analysis(d9_cDC1_cluster, name="d9_cDC1")
#d9_cDC1_KEGG <- readRDS("compareClusters/single_cell/d9_cDC1_KEGG.Rdata")
dotplot(d9_cDC1_KEGG, showCategory=5) + theme(axis.text.x = element_text(angle = 45, hjust = 1))

Day 3 vs cDC1

#d3_cDC1_KEGG <- KEGG_pathway_analysis(d3_cDC1_cluster, name="d3_cDC1")
d3_cDC1_KEGG <- readRDS("compareClusters/single_cell/d3_cDC1_KEGG.Rdata")
dotplot(d3_cDC1_KEGG, showCategory=5) + theme(axis.text.x = element_text(angle = 45, hjust = 1))

5 Plotting gene expression of individual genes

gene_list_sel <- c("CTSS","CD68", "M6PR", "MCL1", "LAMP2", "LRRK2", "MYD88", "TICAM1", "TFEC", "STAT1")
for (i in 1:length(gene_list_sel)) {
  to_be_tested <- row.names(subset(fData(HSMM),
                                   gene_short_name %in% gene_list_sel[i]))
  cds_subset <- HSMM[to_be_tested,]
  cds_subset$cell_type <- factor(cds_subset$cell_typ, levels = c("HEF", "idc_3", "idc_9", "cDC1", "cDC2", "pDC"))
  
  cbPalette=c("grey", "darkgoldenrod3", "darkred", "dodgerblue4","darkgreen", "darkorchid4")
  print(monocle::plot_genes_violin(cds_subset=cds_subset,
                             grouping = "cell_type",
                             color_by = "cell_type",
                             nrow= 2,
                             ncol = NULL,
                             plot_trend = TRUE,  panel_order=gene_list_sel)+ scale_fill_manual(values = cbPalette))
}
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?

## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?

## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?

## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?

## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?

## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?

## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?

## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?

## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?

## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?

6 Velocyto

# library(velocyto.R)
# library(pagoda2)
# seurat.object.final = pbmc.idc
# ldat <- read.loom.matrices("~/Downloads/merge.loom")
# 
# emat <- ldat$spliced
# nmat <- ldat$unspliced
# 
# type_run <- unlist(strsplit(colnames(emat),":"))[seq(1,dim(emat)[2]*2,2)]
# cells_table <- unlist(strsplit(colnames(emat),":"))[seq(2,dim(emat)[2]*2,2)]
# value.iter = 0
# new_cols = c()
# for (i in 1:length(type_run)){
#   value.iter = value.iter + 1
#   if (type_run[i] == "hef_run"){
#       new_cols = c(new_cols,paste0(unlist(strsplit(cells_table[i],"x")),"-1"))
#   } else if (type_run[i] == "cd45d3_run"){
#       new_cols = c(new_cols,paste0(unlist(strsplit(cells_table[i],"x")),"-2"))
#   } else if (type_run[i] == "cd45d9_run"){
#       new_cols = c(new_cols,paste0(unlist(strsplit(cells_table[i],"x")),"-3"))
#   } else if (type_run[i] == "cd141v3_run"){
#       new_cols = c(new_cols,paste0(unlist(strsplit(cells_table[i],"x")),"-4"))
#   } else if (type_run[i] == "cd1c_run"){
#       new_cols = c(new_cols,paste0(unlist(strsplit(cells_table[i],"x")),"-5"))
#   } else if (type_run[i] == "cd123_run"){
#       new_cols = c(new_cols,paste0(unlist(strsplit(cells_table[i],"x")),"-6"))
#   } else {
#       new_cols = c(new_cols,paste0("X",value.iter))
#   }  
# }
# 
# colnames(emat) = new_cols
# colnames(nmat) = new_cols
# 
# emat <- emat[,rownames(seurat.object.final@meta.data)]; 
# nmat <- nmat[,rownames(seurat.object.final@meta.data)]; 
# 
# seurat.object.final <- FindNeighbors(object = seurat.object.final, dims = 1:sel.dim2)
# seurat.object.final <- FindClusters(object = seurat.object.final, resolution = 0.2)
# 
# #cluster.label <- seurat.object.final@active.ident
# cluster.label <- as.factor(as.numeric(seurat.object.final@meta.data$cell_type))
# names(cluster.label) = rownames(seurat.object.final@meta.data)
# #cell.colors <- pagoda2:::fac2col(cluster.label)
# all.gamma = c("#A9A8A6","#EFA46A","#C22D2D","#2A6DA1","#58AE69","#875F98")
# cell.colors = c()
# for (i in 1:length(cluster.label)){
#   cell.colors = c(cell.colors, all.gamma[as.numeric(cluster.label[i])])
#   }
# names(cell.colors) = rownames(seurat.object.final@meta.data)
# 
# emb <- seurat.object.final@reductions$tsne@cell.embeddings
# cell.dist <- as.dist(1-armaCor(t(seurat.object.final@reductions$pca@cell.embeddings)))
# 
# emat <- filter.genes.by.cluster.expression(emat,cluster.label,min.max.cluster.average = 0.5)
# nmat <- filter.genes.by.cluster.expression(nmat,cluster.label,min.max.cluster.average = 0.05)
# 
# fit.quantile <- 0.02
# rvel.cd <- gene.relative.velocity.estimates(emat,nmat,deltaT=1,kCells=20,cell.dist=cell.dist,fit.quantile=fit.quantile)
# 
# show.velocity.on.embedding.cor(emb,rvel.cd,n=300,scale='sqrt',cell.colors=ac(cell.colors,alpha=0.5),cex=0.8,arrow.scale=5,show.grid.flow=TRUE,min.grid.cell.mass=0.5,grid.n=40,arrow.lwd=1,do.par=F,cell.border.alpha = 0.1)